# IMPORT LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import calendar
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
#!pip install plotly
data = pd.read_csv(r"C:\Users\khare\OneDrive\Desktop\python projects\Unemployment_Rate_upto_11_2020.csv")
data
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Region.1 | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.740 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.740 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.740 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.740 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.740 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 262 | West Bengal | 30-06-2020 | M | 7.29 | 30726310 | 40.39 | East | 22.9868 | 87.855 |
| 263 | West Bengal | 31-07-2020 | M | 6.83 | 35372506 | 46.17 | East | 22.9868 | 87.855 |
| 264 | West Bengal | 31-08-2020 | M | 14.87 | 33298644 | 47.48 | East | 22.9868 | 87.855 |
| 265 | West Bengal | 30-09-2020 | M | 9.35 | 35707239 | 47.73 | East | 22.9868 | 87.855 |
| 266 | West Bengal | 31-10-2020 | M | 9.98 | 33962549 | 45.63 | East | 22.9868 | 87.855 |
267 rows × 9 columns
#updating the column names
data.columns=["State","Date","Frequency","Estimated unemployment rate","Estimated employed","Estimated labour participation rate","Region","Longitude","Latitude"]
data.tail()
| State | Date | Frequency | Estimated unemployment rate | Estimated employed | Estimated labour participation rate | Region | Longitude | Latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 262 | West Bengal | 30-06-2020 | M | 7.29 | 30726310 | 40.39 | East | 22.9868 | 87.855 |
| 263 | West Bengal | 31-07-2020 | M | 6.83 | 35372506 | 46.17 | East | 22.9868 | 87.855 |
| 264 | West Bengal | 31-08-2020 | M | 14.87 | 33298644 | 47.48 | East | 22.9868 | 87.855 |
| 265 | West Bengal | 30-09-2020 | M | 9.35 | 35707239 | 47.73 | East | 22.9868 | 87.855 |
| 266 | West Bengal | 31-10-2020 | M | 9.98 | 33962549 | 45.63 | East | 22.9868 | 87.855 |
data.shape
(267, 9)
data.columns
Index(['State', 'Date', 'Frequency', 'Estimated unemployment rate',
'Estimated employed', 'Estimated labour participation rate', 'Region',
'Longitude', 'Latitude'],
dtype='object')
data.describe()
| Estimated unemployment rate | Estimated employed | Estimated labour participation rate | Longitude | Latitude | |
|---|---|---|---|---|---|
| count | 267.000000 | 2.670000e+02 | 267.000000 | 267.000000 | 267.000000 |
| mean | 12.236929 | 1.396211e+07 | 41.681573 | 22.826048 | 80.532425 |
| std | 10.803283 | 1.336632e+07 | 7.845419 | 6.270731 | 5.831738 |
| min | 0.500000 | 1.175420e+05 | 16.770000 | 10.850500 | 71.192400 |
| 25% | 4.845000 | 2.838930e+06 | 37.265000 | 18.112400 | 76.085600 |
| 50% | 9.650000 | 9.732417e+06 | 40.390000 | 23.610200 | 79.019300 |
| 75% | 16.755000 | 2.187869e+07 | 44.055000 | 27.278400 | 85.279900 |
| max | 75.850000 | 5.943376e+07 | 69.690000 | 33.778200 | 92.937600 |
data.isnull().sum()
State 0 Date 0 Frequency 0 Estimated unemployment rate 0 Estimated employed 0 Estimated labour participation rate 0 Region 0 Longitude 0 Latitude 0 dtype: int64
data.duplicated().any()
np.False_
data.State.value_counts()
State Andhra Pradesh 10 Assam 10 Bihar 10 Chhattisgarh 10 Delhi 10 Goa 10 Gujarat 10 Haryana 10 Himachal Pradesh 10 Jharkhand 10 Karnataka 10 Rajasthan 10 Kerala 10 Madhya Pradesh 10 Maharashtra 10 Meghalaya 10 Odisha 10 Puducherry 10 Punjab 10 Uttarakhand 10 Tamil Nadu 10 Telangana 10 Tripura 10 West Bengal 10 Uttar Pradesh 10 Jammu & Kashmir 9 Sikkim 8 Name: count, dtype: int64
data.Region.value_counts()
Region North 79 South 60 West 50 East 40 Northeast 38 Name: count, dtype: int64
# # Converting "Date" column to Datetime format
# Clean column names
data.columns = data.columns.str.strip().str.lower()
# Convert to datetime
data['date'] = pd.to_datetime(data['date'], dayfirst=True, errors='coerce')
# Convert categorical columns
data['frequency'] = data['frequency'].astype('category')
data['region'] = data['region'].astype('category')
📅 Extracting Month From Date Attribute
data['Month']= data['date'].dt.month
#converting 'month' to integer format
data['Month_int'] = data['Month'].apply(lambda x: int(x))
# Mapping integer month values to abbreviated month names
data['Month_name'] = data['Month_int'].apply(lambda x: calendar.month_abbr[x])
#Dropping the original 'Month' column
data.drop(columns='Month', inplace=True)
data['Month'] = data['Month_int'].apply(lambda x: calendar.month_abbr[x])
data.tail()
| state | date | frequency | estimated unemployment rate | estimated employed | estimated labour participation rate | region | longitude | latitude | Month_int | Month_name | Month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 262 | West Bengal | 2020-06-30 | M | 7.29 | 30726310 | 40.39 | East | 22.9868 | 87.855 | 6 | Jun | Jun |
| 263 | West Bengal | 2020-07-31 | M | 6.83 | 35372506 | 46.17 | East | 22.9868 | 87.855 | 7 | Jul | Jul |
| 264 | West Bengal | 2020-08-31 | M | 14.87 | 33298644 | 47.48 | East | 22.9868 | 87.855 | 8 | Aug | Aug |
| 265 | West Bengal | 2020-09-30 | M | 9.35 | 35707239 | 47.73 | East | 22.9868 | 87.855 | 9 | Sep | Sep |
| 266 | West Bengal | 2020-10-31 | M | 9.98 | 33962549 | 45.63 | East | 22.9868 | 87.855 | 10 | Oct | Oct |
📊 Exploratory Data Analysis
#Basic Statistics
data_stats = data[['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']]
round(data_stats.describe().T, 2)
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| estimated unemployment rate | 267.0 | 12.24 | 10.80 | 0.50 | 4.84 | 9.65 | 16.76 | 75.85 |
| estimated employed | 267.0 | 13962105.72 | 13366318.36 | 117542.00 | 2838930.50 | 9732417.00 | 21878686.00 | 59433759.00 |
| estimated labour participation rate | 267.0 | 41.68 | 7.85 | 16.77 | 37.26 | 40.39 | 44.06 | 69.69 |
region_stats = data.groupby(['region'])[['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']].mean().reset_index()
round(region_stats, 2)
| region | estimated unemployment rate | estimated employed | estimated labour participation rate | |
|---|---|---|---|---|
| 0 | East | 13.92 | 19602366.90 | 40.11 |
| 1 | North | 15.89 | 13072487.92 | 38.70 |
| 2 | Northeast | 10.95 | 3617105.53 | 52.06 |
| 3 | South | 10.45 | 14040589.33 | 40.44 |
| 4 | West | 8.24 | 18623512.72 | 41.26 |
🧠📊 Data Visualizations
🗺️📍State Wise Analysis in Every Month
IMD = data.groupby(["Month"])[['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']].mean()
IMD = pd.DataFrame(IMD).reset_index()
# Step 1: Import required libraries
# import plotly.graph_objects as go
import plotly.io as pio
#pio.kaleido.scope.default_format = "png"
# Step 2: Set the renderer (choose based on your environment)
pio.renderers.default = 'notebook' # Use 'browser' if you're not in Jupyter
# Step 3: Prepare the data (make sure your DataFrame 'IMD' is already defined)
# Example column names assumed: 'Month', 'estimated unemployment rate (%)', 'estimated labour participation rate (%)'
# Check column names to avoid types
print(IMD.columns)
# Step 4: Extract data
month = IMD["Month"]
unemployment_rate = IMD["estimated unemployment rate"]
labour_participation_rate = IMD["estimated labour participation rate"]
# Step 5: Create the figure
fig = go.Figure()
fig.add_trace(go.Bar(
x=month,
y=unemployment_rate,
name="Unemployment Rate",
marker_color='indianred'
))
fig.add_trace(go.Bar(
x=month,
y=labour_participation_rate,
name="Labour Participation Rate",
marker_color='lightskyblue'
))
# Step 6: Customize the layout
fig.update_layout(
title="Unemployment Rate vs Labour Participation Rate",
xaxis_title="Month",
yaxis_title="Percentage",
barmode='group', # side-by-side bars
template='plotly_white'
)
# Step 7: Show the chart
fig.show()
Index(['Month', 'estimated unemployment rate', 'estimated employed',
'estimated labour participation rate'],
dtype='object')
#!pip install -U kaleido
print(IMD.head())
Month estimated unemployment rate estimated employed \ 0 Apr 22.236154 1.057020e+07 1 Aug 10.313333 1.442904e+07 2 Feb 9.266154 1.548827e+07 3 Jan 9.196538 1.563720e+07 4 Jul 9.834444 1.441802e+07 estimated labour participation rate 0 35.297308 1 42.390741 2 44.180769 3 44.626538 4 42.274815
📊📈Bar Plot of Estimated Employed Citizens in Every Month
import plotly.express as px
fig = px.bar(IMD, x='Month', y='estimated employed', color='Month',
category_orders = {"Month":["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct"]},
title = 'Estimated employed people from Jan 2020 to Oct 2020')
fig.show()
Significant Impact of the Nationwide Lockdown:
The data clearly shows a sharp and widespread increase in the estimated unemployment rate across all regions of India during April and May 2020.
This directly correlates with the implementation of the nationwide lockdown to curb the spread of COVID-19. The unemployment rate peaked during these months, demonstrating the immediate and severe disruption to economic activities and job markets.
📍📊Regional Analysis
State = data.groupby("region")[['estimated unemployment rate','estimated employed','estimated labour participation rate']].mean()
State = pd.DataFrame(State).reset_index()
#box Plot
fig = px.box(data,x='state',y='estimated unemployment rate',color='state',title='Unemployment rate')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
Significant Impact of the Nationwide Lockdown:
The data clearly shows a sharp and widespread increase in the estimated unemployment rate across all regions of India during April and May 2020. This directly correlates with the implementation of the nationwide lockdown to curb the spread of COVID-19. The unemployment rate peaked during these months, demonstrating the immediate and severe disruption to economic activities and job markets. Varied Regional Impact and Recovery:
While all regions experienced a surge in unemployment during the lockdown, there were noticeable differences in the peak rates and recovery trajectories. Regions like 'Urban' and 'North' generally showed higher unemployment rates during the peak lockdown period compared to others, but also experienced a more pronounced recovery in the subsequent months. The average unemployment rate also varied significantly across regions, with some regions consistently showing higher overall unemployment than others throughout the period. State-Level Disparities in Lockdown Impact:
The lockdown's impact on unemployment was not uniform across states. Some states experienced a much more drastic surge in unemployment than others. Most Affected States: Puducherry, Jharkhand, Tamil Nadu, Bihar, and Karnataka recorded the largest increases in unemployment rates when comparing the lockdown period (April-June 2020) to the pre-lockdown period (January-March 2020). This suggests these states' economies or labor markets were particularly vulnerable to the lockdown measures. States with Higher Overall Average Unemployment: Over the entire period, states like Haryana, Tripura, and Jammu & Kashmir consistently had higher average estimated unemployment rates. States with Lower Overall Average Unemployment: Conversely, states such as Meghalaya, Sikkim, and Chhattisgarh maintained relatively lower average estimated unemployment rates, indicating a more stable employment scenario or different economic structures.
⚖️Average Unemployment Rate Bar Plot
# Grouping data to get the average unemployment rate by state
state_avg = data.groupby('state')['estimated unemployment rate'].mean().reset_index()
# Rename columns for simplicity
state_avg.columns = ['state', 'avg_unemployment_rate']
fig = px.bar(
state_avg,
x='state',
y='avg_unemployment_rate',
color="state",
title="Average Unemployment Rate by State"
)
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
Haryana and Tripura was having the highest average amount of Unemployment Rate
Meghalaya was having the lowest average amount of Unemployment Rate
🔥📈Correlation Heatmap
heat_maps = data[["estimated unemployment rate", "estimated employed","estimated labour participation rate",'longitude','latitude','Month_int']]
heat_maps = heat_maps.corr()
plt.figure(figsize=(10,5))
sns.set_context("notebook",font_scale=1)
sns.heatmap(heat_maps,annot=True , cmap="coolwarm")
<Axes: >
🌠🧩Scatter Matrix
fig = px.scatter_matrix(data, template='plotly',
dimensions=['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate'],
color='region')
fig.show()
fig.write_image('unemployment_plot.png')
🔍 🤔Animated bar plot of Unemployment rate across region from Jan.2020 to Oct.2020
fig = px.bar(data, x='region', y='estimated unemployment rate', animation_frame='Month_name', color='state',
title='Unemployment rate across region from Jan.2020 to Oct.2020', height=700, template='plotly')
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
☀️🌀Sunburst chart
unemplo_df = data[['state', 'region', 'estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']]
unemplo = unemplo_df.groupby(['region', 'state'])['estimated unemployment rate'].mean().reset_index()
fig = px.sunburst(unemplo, path=['region', 'state'], values='estimated unemployment rate',
color_continuous_scale='Plasma', title='Unemployment rate in each region and state',
height=650, template='ggplot2')
fig.show()
🚧🚀Monthly Unemployment Rate
#Impact of Lockdown on States Estimated Employed
fig = px.scatter_geo(data,'longitude', 'latitude', color="region",
hover_name="state", size="estimated unemployment rate",
animation_frame="Month_name",scope='asia',template='seaborn',title='Impack of lockdown on Employement across regions')
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.update_geos(lataxis_range=[5,35], lonaxis_range=[65, 100],oceancolor="lightblue",
showocean=True)
fig.show()
data.region.unique()
['South', 'Northeast', 'East', 'West', 'North'] Categories (5, object): ['East', 'North', 'Northeast', 'South', 'West']
📉😔Unemployment Rate Before and After Lockdown
#data representation before and after the lockdown
before_lockdown = data[(data['Month_int']>=1) & (data['Month_int']<4)]
after_lockdown = data[(data['Month_int']>=4) & (data['Month_int']<=6)]
af_lockdown=after_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown= before_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown['Unemployment Rate before lockdown'] = af_lockdown['estimated unemployment rate']
lockdown.columns=['state','Unemployment Rate Before Lockdown','Unemployment Rate After Lockdown']
lockdown.head()
| state | Unemployment Rate Before Lockdown | Unemployment Rate After Lockdown | |
|---|---|---|---|
| 0 | Andhra Pradesh | 5.700000 | 13.750000 |
| 1 | Assam | 4.613333 | 7.070000 |
| 2 | Bihar | 12.110000 | 36.806667 |
| 3 | Chhattisgarh | 8.523333 | 9.380000 |
| 4 | Delhi | 18.036667 | 25.713333 |
# percentage change in unemployment rate
lockdown['rate change in unemployment'] = round(lockdown['Unemployment Rate After Lockdown'] -lockdown['Unemployment Rate Before Lockdown']/lockdown['Unemployment Rate Before Lockdown'],2)
plot_per = lockdown.sort_values('rate change in unemployment')
# percentage change in unemployment after lockdown
fig = px.bar(plot_per, x='state',y='rate change in unemployment',color='state',
title='percentage change in Unemployment in each state after lockdown',template='ggplot2')
fig.show()
Most Impacted States/Union Territories
Puducherry
Jharkhand
Bihar
Haryana
Tripura
🔒🚫Impact of Lockdown on Employment Across States
# function to sort value based on impact
def sort_impact(x):
if x <= 10:
return 'impacted States'
elif x <= 20:
return 'hard impacted States'
elif x <= 30:
return 'harder impacted States'
elif x <= 46:
return 'hardest impacted States'
return x
plot_per['impact status'] = plot_per['rate change in unemployment'].apply(lambda x:sort_impact(x))
fig = px.bar(plot_per, y='state',x='rate change in unemployment',color='impact status',
title='Impact of lockdown on employment across states',template='ggplot2',height=650)
fig.show()
✨FUTURE RECOMMENDATIONS ✨
Here's a brief summary of the unemployment analysis in India from January to October 2020 and future recommendations:
Key Insights Significant Lockdown Impact: Unemployment rates sharply increased across all regions, peaking in April-May 2020 due to the nationwide lockdown.
Varied Regional and State Impact: While all areas were affected, some regions (e.g., Urban, North) and states (e.g., Puducherry, Jharkhand, Tamil Nadu, Bihar, Karnataka) experienced a more severe surge and varied recovery trajectories. States like Haryana and Tripura showed consistently higher overall unemployment, while Meghalaya and Sikkim had lower rates.
Gradual Recovery: Post-lockdown, unemployment rates gradually declined, but many areas had not returned to pre-lockdown levels by October 2020. Future Recommendations
Targeted Programs: Implement state-specific employment and re-skilling programs, focusing on the most affected states and sectors.
Business Support: Provide financial and logistical aid to vulnerable MSMEs and labor-intensive industries.
Social Safety Nets: Enhance unemployment benefits and income support programs.
Regional/Rural Focus: Prioritize rural development and non-agricultural job creation to balance growth.
Data-Driven Policies: Continuously analyze data for agile policy responses.
Promote Entrepreneurship: Support entrepreneurship with easier credit and mentorship.